Imports and custom functions

In [36]:
import pandas as pd
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
plotly.offline.init_notebook_mode(connected=True)
import matplotlib.pyplot as plt
In [37]:
def prepare_df(df_path, df_name):

    """Read in information from dataframe"""

    # read in df1
    df = pd.read_csv(df_path, header=0, sep="\t")
    # Mark significants
    df["Significant"]  = "No"
    df.loc[(df["FDR"] < 0.05) & (df["logCPM"]>2), "Significant"] = "FDR<0.05\nlogCPM>2"
    # set id as index
    df.set_index("id", inplace=True)
    # rename columns
    df.columns = df.columns + "_" + df_name
    # reset index
    df.reset_index(inplace=True)

    return df
In [38]:
def plot_MA(df, df_name):

    """Plot MA"""

    sns.set(font_scale=2)
    sns_plot = sns.lmplot(x=f"logCPM_{df_name}",
                          y=f"logFC_{df_name}",
                          data=df,
                          fit_reg=False,
                          hue=f"Significant_{df_name}",
                          hue_order=["No", "FDR<0.05\nlogCPM>2"],
                          palette=["black", "red"],
                          height=15,
                          aspect=1)
    ax = plt.gca()
    ax.set_title(f"MA plot for {df_name}")
In [39]:
def merge_df(df_1, df_2):
    
    """Merge dataframes"""
    
    df = pd.merge(df_1, df_2, on="id")
    df["Significant"] = "No"
    return df
In [40]:
def plot_FC_correlation(df, FC_1_name, FC_2_name, hue, hue_order):

    """Plot correlation between FCs"""

    sns.set(font_scale=2)
    sns_plot = sns.lmplot(x=f"logFC_{FC_1_name}",
                          y=f"logFC_{FC_2_name}",
                          data=df,
                          fit_reg=False,
                          hue=hue,
                          hue_order=hue_order,
                          palette=["black", "yellow", "blue", "green"],
                          height=15,
                          aspect=1)
    ax = plt.gca()
    ax.set_title(f"FCs between {FC_1_name} and {FC_2_name}")
In [41]:
def compare(df_1_path, df_2_path, df_1_name, df_2_name):
    
    "Combine multiple functions"
    
    df_1 = prepare_df(df_1_path, df_1_name)
    plot_MA(df_1, df_1_name)
    df_2 = prepare_df(df_2_path, df_2_name)
    plot_MA(df_2, df_2_name)
    
    df_merged = merge_df(df_1, df_2)
    
#     plot_FC_correlation(df_merged, df_1_name, df_2_name, hue="Significant")
#     plot_FC_correlation(df_merged, df_1_name, df_2_name, hue="Significant_" + df_1_name)
#     plot_FC_correlation(df_merged, df_1_name, df_2_name, hue="Significant_" + df_2_name)
    
    return df_merged
In [ ]:
 

Input files

In [42]:
ip_path = "../WAGO_3_IP/results/DE__N2_Input_Rpph__N2_xf119_Rpph/plot_small_RNAs/22G.tsv"
ip_name = "WAGO-3"

Dpf_3_null_path = "../small_RNA_seq_15_C/results/DE__WT_dpf3__Dpf_3_null/plot_small_RNAs/22G.tsv"
Dpf_3_null_name = "Dpf_3_null"

Dpf_3_S784A_path = "../small_RNA_seq_15_C/results/DE__WT_dpf3__Dpf_3_S784A//plot_small_RNAs/22G.tsv"
Dpf_3_S784A_name = "Dpf_3_S784A"

mut_2_path = "../small_RNA_seq_15_C/results/DE__WT_other__mut_2/plot_small_RNAs/22G.tsv"
mut_2_name = "mut_2"

mut_7_path = "../small_RNA_seq_15_C/results/DE__WT_other__mut_7/plot_small_RNAs/22G.tsv"
mut_7_name = "mut_7"

Dpf_3_null_first_path = "../small_RNA_seq_15_C/results/DE__WT_dpf3_first__Dpf_3_null/plot_small_RNAs/22G.tsv"
Dpf_3_null_first_name = "Dpf_3_null_first"

Dpf_3_S784A_second_path = "../small_RNA_seq_15_C/results/DE__WT_dpf3_second__Dpf_3_S784A/plot_small_RNAs/22G.tsv"
Dpf_3_S784A_second_name = "Dpf_3_S784A_second"

Dpf_3_null_old_path = "../small_RNA_seq_25_C/results/DE__WT__dpf_3_delta/plot_small_RNAs/22G.tsv"
Dpf_3_null_old_name = "Dpf_3_null_old"

dpf_3S784A_old_path = "../small_RNA_seq_25_C/results/DE__WT__dpf_3S784A/plot_small_RNAs/22G.tsv"
dpf_3S784A_old_name = "Dpf_3_S784A_old"

ip_wago_9_path = "../WAGO_9_IP/results/DE__wago-9_input__wago-9_IP/plot_small_RNAs/22G.tsv"
ip_wago_9_name = "WAGO-9"

ip_csr_1_path = "../csr_1_IP/results/DE__csr-1_input__csr-1_IP/plot_small_RNAs/22G.tsv"
ip_csr_1_name = "csr-1"

Comparisons

1. IP vs Dpf_3_null

In [43]:
df_1_path = ip_path
df_2_path = Dpf_3_null_path
df_1_name = ip_name
df_2_name = Dpf_3_null_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[43]:
Significant
FDR_WAGO-3<0.05\nFDR_Dpf_3_null<0.05\nlogCPM_WAGO-3>2\nlogCPM_Dpf_3_null>2 3379
FDR_WAGO-3<0.05\nlogCPM_WAGO-3>2 863
FDR_Dpf_3_null<0.05\nlogCPM_Dpf_3_null>2 320
No 155

2. IP vs Dpf_3_S784A

In [44]:
df_1_path = ip_path
df_2_path = Dpf_3_S784A_path
df_1_name = ip_name
df_2_name = Dpf_3_S784A_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[44]:
Significant
FDR_WAGO-3<0.05\nFDR_Dpf_3_S784A<0.05\nlogCPM_WAGO-3>2\nlogCPM_Dpf_3_S784A>2 3992
FDR_WAGO-3<0.05\nlogCPM_WAGO-3>2 469
FDR_Dpf_3_S784A<0.05\nlogCPM_Dpf_3_S784A>2 424
No 70

3. IP vs mut_2

In [45]:
df_1_path = ip_path
df_2_path = mut_2_path
df_1_name = ip_name
df_2_name = mut_2_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[45]:
Significant
FDR_WAGO-3<0.05\nFDR_mut_2<0.05\nlogCPM_WAGO-3>2\nlogCPM_mut_2>2 3530
FDR_mut_2<0.05\nlogCPM_mut_2>2 434
FDR_WAGO-3<0.05\nlogCPM_WAGO-3>2 8

4. IP vs mut_7

In [46]:
df_1_path = ip_path
df_2_path = mut_7_path
df_1_name = ip_name
df_2_name = mut_7_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[46]:
Significant
FDR_WAGO-3<0.05\nFDR_mut_7<0.05\nlogCPM_WAGO-3>2\nlogCPM_mut_7>2 3732
FDR_mut_7<0.05\nlogCPM_mut_7>2 463
FDR_WAGO-3<0.05\nlogCPM_WAGO-3>2 58
No 4

5.a. Dpf_3_null vs Dpf_3_S784A with same WTs (It's wrong but I leave it, just to see the differences)

In [47]:
df_1_path = Dpf_3_null_path
df_2_path = Dpf_3_S784A_path
df_1_name = Dpf_3_null_name
df_2_name = Dpf_3_S784A_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
In [48]:
df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[48]:
Significant
FDR_Dpf_3_null<0.05\nFDR_Dpf_3_S784A<0.05\nlogCPM_Dpf_3_null>2\nlogCPM_Dpf_3_S784A>2 5338
No 1172
FDR_Dpf_3_S784A<0.05\nlogCPM_Dpf_3_S784A>2 962
FDR_Dpf_3_null<0.05\nlogCPM_Dpf_3_null>2 102

5.b. Dpf_3_null vs Dpf_3_S784A with different WTs

In [49]:
df_1_path = Dpf_3_null_first_path
df_2_path = Dpf_3_S784A_second_path
df_1_name = Dpf_3_null_first_name
df_2_name = Dpf_3_S784A_second_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
In [50]:
df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[50]:
Significant
FDR_Dpf_3_null_first<0.05\nFDR_Dpf_3_S784A_second<0.05\nlogCPM_Dpf_3_null_first>2\nlogCPM_Dpf_3_S784A_second>2 2121
No 942
FDR_Dpf_3_S784A_second<0.05\nlogCPM_Dpf_3_S784A_second>2 838
FDR_Dpf_3_null_first<0.05\nlogCPM_Dpf_3_null_first>2 46

6. Dpf_3_null vs mut_2

In [51]:
df_1_path = Dpf_3_null_path
df_2_path = mut_2_path
df_1_name = Dpf_3_null_name
df_2_name = mut_2_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
In [52]:
df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[52]:
Significant
FDR_Dpf_3_null<0.05\nFDR_mut_2<0.05\nlogCPM_Dpf_3_null>2\nlogCPM_mut_2>2 3653
FDR_mut_2<0.05\nlogCPM_mut_2>2 1103
No 41
FDR_Dpf_3_null<0.05\nlogCPM_Dpf_3_null>2 1

7. Dpf_3_null vs mut_7

In [53]:
df_1_path = Dpf_3_null_path
df_2_path = mut_7_path
df_1_name = Dpf_3_null_name
df_2_name = mut_7_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
In [54]:
df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[54]:
Significant
FDR_Dpf_3_null<0.05\nFDR_mut_7<0.05\nlogCPM_Dpf_3_null>2\nlogCPM_mut_7>2 3825
FDR_mut_7<0.05\nlogCPM_mut_7>2 1116
No 188
FDR_Dpf_3_null<0.05\nlogCPM_Dpf_3_null>2 6

8. Dpf_3_Dpf_3_S784A vs mut_2

In [55]:
df_1_path = Dpf_3_S784A_path
df_2_path = mut_2_path
df_1_name = Dpf_3_S784A_name
df_2_name = mut_2_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
In [56]:
df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[56]:
Significant
FDR_Dpf_3_S784A<0.05\nFDR_mut_2<0.05\nlogCPM_Dpf_3_S784A>2\nlogCPM_mut_2>2 4258
FDR_mut_2<0.05\nlogCPM_mut_2>2 589
No 36
FDR_Dpf_3_S784A<0.05\nlogCPM_Dpf_3_S784A>2 4

9. Dpf_3_Dpf_3_S784A vs mut_7

In [57]:
df_1_path = Dpf_3_S784A_path
df_2_path = mut_7_path
df_1_name = Dpf_3_S784A_name
df_2_name = mut_7_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
In [58]:
df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[58]:
Significant
FDR_Dpf_3_S784A<0.05\nFDR_mut_7<0.05\nlogCPM_Dpf_3_S784A>2\nlogCPM_mut_7>2 4464
FDR_mut_7<0.05\nlogCPM_mut_7>2 578
No 164
FDR_Dpf_3_S784A<0.05\nlogCPM_Dpf_3_S784A>2 23

10. Dpf_3_null vs Dpf_3_null_old

In [59]:
df_1_path = Dpf_3_null_path
df_2_path = Dpf_3_null_old_path
df_1_name = Dpf_3_null_name
df_2_name = Dpf_3_null_old_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
In [60]:
df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[60]:
Significant
FDR_Dpf_3_null<0.05\nlogCPM_Dpf_3_null>2 2193
FDR_Dpf_3_null<0.05\nFDR_Dpf_3_null_old<0.05\nlogCPM_Dpf_3_null>2\nlogCPM_Dpf_3_null_old>2 2041
No 1380
FDR_Dpf_3_null_old<0.05\nlogCPM_Dpf_3_null_old>2 207

11. Dpf_3_Dpf_3_S784A vs Dpf_3_Dpf_3_S784A old

In [61]:
df_1_path = Dpf_3_S784A_path
df_2_path = dpf_3S784A_old_path
df_1_name = Dpf_3_S784A_name
df_2_name = dpf_3S784A_old_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)
In [62]:
df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[62]:
Significant
FDR_Dpf_3_S784A<0.05\nlogCPM_Dpf_3_S784A>2 2994
FDR_Dpf_3_S784A<0.05\nFDR_Dpf_3_S784A_old<0.05\nlogCPM_Dpf_3_S784A>2\nlogCPM_Dpf_3_S784A_old>2 2208
No 870
FDR_Dpf_3_S784A_old<0.05\nlogCPM_Dpf_3_S784A_old>2 88

12. IP WAGO-9 vs Dpf_3_null

In [63]:
df_1_path = ip_wago_9_path
df_2_path = Dpf_3_null_path
df_1_name = ip_wago_9_name
df_2_name = Dpf_3_null_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[63]:
Significant
FDR_Dpf_3_null<0.05\nlogCPM_Dpf_3_null>2 2418
FDR_WAGO-9<0.05\nFDR_Dpf_3_null<0.05\nlogCPM_WAGO-9>2\nlogCPM_Dpf_3_null>2 1588
FDR_WAGO-9<0.05\nlogCPM_WAGO-9>2 867
No 861

13. IP WAGO-9 vs Dpf_3_S784A

In [64]:
df_1_path = ip_wago_9_path
df_2_path = Dpf_3_S784A_path
df_1_name = ip_wago_9_name
df_2_name = Dpf_3_S784A_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[64]:
Significant
FDR_Dpf_3_S784A<0.05\nlogCPM_Dpf_3_S784A>2 2967
FDR_WAGO-9<0.05\nFDR_Dpf_3_S784A<0.05\nlogCPM_WAGO-9>2\nlogCPM_Dpf_3_S784A>2 2065
No 554
FDR_WAGO-9<0.05\nlogCPM_WAGO-9>2 451

14. IP WAGO-9 vs mut_2

In [65]:
df_1_path = ip_wago_9_path
df_2_path = mut_2_path
df_1_name = ip_wago_9_name
df_2_name = mut_2_name


df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[65]:
Significant
FDR_mut_2<0.05\nlogCPM_mut_2>2 2478
FDR_WAGO-9<0.05\nFDR_mut_2<0.05\nlogCPM_WAGO-9>2\nlogCPM_mut_2>2 2142
No 9
FDR_WAGO-9<0.05\nlogCPM_WAGO-9>2 2

15. IP WAGO-9 vs mut_7

In [66]:
df_1_path = ip_wago_9_path
df_2_path = mut_7_path
df_1_name = ip_wago_9_name
df_2_name = mut_7_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[66]:
Significant
FDR_mut_7<0.05\nlogCPM_mut_7>2 2689
FDR_WAGO-9<0.05\nFDR_mut_7<0.05\nlogCPM_WAGO-9>2\nlogCPM_mut_7>2 2255
No 90
FDR_WAGO-9<0.05\nlogCPM_WAGO-9>2 43

16. IP crs-1 vs Dpf_3_null

In [67]:
df_1_path = ip_csr_1_path
df_2_path = Dpf_3_null_path
df_1_name = ip_csr_1_name
df_2_name = Dpf_3_null_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[67]:
Significant
FDR_Dpf_3_null<0.05\nlogCPM_Dpf_3_null>2 2783
No 1123
FDR_csr-1<0.05\nFDR_Dpf_3_null<0.05\nlogCPM_csr-1>2\nlogCPM_Dpf_3_null>2 788
FDR_csr-1<0.05\nlogCPM_csr-1>2 458

17. IP crs-1 vs Dpf_3_S784A

In [68]:
df_1_path = ip_csr_1_path
df_2_path = Dpf_3_S784A_path
df_1_name = ip_csr_1_name
df_2_name = Dpf_3_S784A_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[68]:
Significant
FDR_Dpf_3_S784A<0.05\nlogCPM_Dpf_3_S784A>2 3431
FDR_csr-1<0.05\nFDR_Dpf_3_S784A<0.05\nlogCPM_csr-1>2\nlogCPM_Dpf_3_S784A>2 1017
No 660
FDR_csr-1<0.05\nlogCPM_csr-1>2 252

18. IP crs-1 vs mut_2

In [69]:
df_1_path = ip_csr_1_path
df_2_path = mut_2_path
df_1_name = ip_csr_1_name
df_2_name = mut_2_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[69]:
Significant
FDR_mut_2<0.05\nlogCPM_mut_2>2 3115
FDR_csr-1<0.05\nFDR_mut_2<0.05\nlogCPM_csr-1>2\nlogCPM_mut_2>2 980
FDR_csr-1<0.05\nlogCPM_csr-1>2 8
No 6

19. IP crs-1 vs mut_7

In [70]:
df_1_path = ip_csr_1_path
df_2_path = mut_7_path
df_1_name = ip_csr_1_name
df_2_name = mut_7_name

df_all = compare(df_1_path, df_2_path, df_1_name, df_2_name)

df_all["Significant"] = "No"

df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2),
            "Significant"] = f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2"


df_all.loc[(df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_2_name}"]>2),
            "Significant"] = f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2"


df_all.loc[(df_all[f"FDR_{df_1_name}"]<0.05) & 
           (df_all[f"FDR_{df_2_name}"]<0.05) &
           (df_all[f"logCPM_{df_1_name}"]>2) &
           (df_all[f"logCPM_{df_2_name}"]>2)
           , "Significant"] = f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"

plot_FC_correlation(df_all, df_1_name, df_2_name, hue="Significant", hue_order=["No", f"FDR_{df_1_name}<0.05\nlogCPM_{df_1_name}>2", f"FDR_{df_2_name}<0.05\nlogCPM_{df_2_name}>2", f"FDR_{df_1_name}<0.05\nFDR_{df_2_name}<0.05\nlogCPM_{df_1_name}>2\nlogCPM_{df_2_name}>2"])

pd.DataFrame(df_all["Significant"].value_counts())
Out[70]:
Significant
FDR_mut_7<0.05\nlogCPM_mut_7>2 3317
FDR_csr-1<0.05\nFDR_mut_7<0.05\nlogCPM_csr-1>2\nlogCPM_mut_7>2 1023
No 104
FDR_csr-1<0.05\nlogCPM_csr-1>2 64